setwd("~/Documents/PostDocMLW/JenConnick/COVID")
rm(list=ls())

##=========================================================================================
#Loading packages 
##=========================================================================================
load.pckgs<-c("foreign", "reshape2","lattice", "limma", "readxl", "multtest","rocc",
             "corrplot",  "sva","WGCNA","gplots","ggplot2", "glmnet",
             "chron","openxlsx", "metaMA", "metap", "epiR", "arsenal")

load.pckgs
for(x in 1:length(load.pckgs))    
  library(package = load.pckgs[x], character.only = TRUE)

##=========================================================================================
#Data management
##=========================================================================================
##(1)Loading data
covid.df <- na.omit(read_excel("CovidData2020Nov_All_FDT_working doc.xlsx",sheet = "CovidData2020Nov"))
names(covid.df)

##Sample_ID
(covid.df$Sample_ID<-covid.df$SAMPLEName)

##----------------------------------------------------------------------------------------
##HIV status
##----------------------------------------------------------------------------------------
table(covid.df$hiv)
unique(covid.df$hiv)
covid.df$HIV_status<-sapply(covid.df$hiv, function(x) 
  switch(x, "Seropositive"="Positive","Non-reactive"="Negative",   "NA"=NA, "Status unknown"=NA))
table(covid.df$hiv,covid.df$HIV_status)

##----------------------------------------------------------------------------------------
##Comparison groups
##----------------------------------------------------------------------------------------
##(1) Diagnostic groups
table(covid.df$stage); table(is.na(covid.df$stage))

covid.df$DiagGroups<-sapply(covid.df$stage, function(x) switch(x,
                "EARLY"="PCR-confirmed COVID-19",  "LATE"="PCR-IgG+ SARI","SARI"="PCR-IgG- SARI"))
table(covid.df$DiagGroups)

##Group colours
covid.df$DiagGroupCols<-sapply(covid.df$DiagGroups, function(x)
        switch(x,"PCR-IgG- SARI"="#B95AFF", "PCR-IgG+ SARI"= "#01B1BA", "PCR-confirmed COVID-19"="#61A440"))
table(covid.df$DiagGroups,covid.df$DiagGroupCols)

##(2)PCR test
table(is.na(covid.df$mlw_covid_test_result)); table(covid.df$mlw_covid_test_result)
table(is.na(covid.df$qech_covid_test_result)); table(covid.df$qech_covid_test_result)
table(covid.df$PCRtest<-ifelse((covid.df$qech_covid_test_result%in%"Positive" | 
                                  covid.df$mlw_covid_test_result%in%"Positive"),"Positive", "Negative"))
table(covid.df$PCRtest, covid.df$DiagGroups)


##(3)ISARIC Covid severity
class(covid.df$ISARICSeverity)
covid.df$ISARICSeverity<-as.numeric(covid.df$ISARICSeverity)
hist(covid.df$ISARICSeverity)
table(covid.df$CovidSeverity<-ifelse(covid.df$ISARICSeverity<6, "Non-Severe(IS<6)", "Severe(IS>=6)"))

##----------------------------------------------------------------------------------------
##Pathogens
##----------------------------------------------------------------------------------------
##Varible labels
pathoIDs=c(FLU_A="Influenza A virus", FLU_B="Influenza B virus", 
        RHINO="Rhinovirus", COR_299="Coronavirus_299", COR_43="Coronavirus_43",
        COR_63= "Coronavirus_63",HKU1="Coronavirus_HKU1", PARA_1="Parainfluenza1 virus",
        PARA_2="Parainfluenza2 virus", PARA_3="Parainfluenza3 virus", PARA_4="Parainfluenza4 virus",
        HBoV="Bocavirus",MPNE="Mycoplasma pneumoniae",HMPV_A.B="Metapneumovirus A & B",
        AV="Adenovirus",  EV="Enterovirus", HPEV="Parechovirus",RSVA.B="Respiratory syncytial virus", 
        C.PNEU="Chlamydophila pneumoniae",HiB="Haemophilus influenzae type b", 
        S.AUR="Staphylococcus aureus",S.PNEU="Streptococcus pneumoniae",
        BORD="Bordetella pertussiss", FLUC="Influenza C virus", HAEINF="Haemophilus influenzae",
        MORAX="Moraxella catarrhalis",K.PNEU="Klebsiella pneumoniae",
        LEGIO="Legionella longbeachae",PCP="Pneumocystis jiroveci",SALM="Salmonella")

apply(covid.df[,names(pathoIDs)],2, function(x) sum(is.na(x)))

##Number of positive isolates
hist(covid.df$Pathogens, main="Number of isolates"); table(is.na(covid.df$Pathogens))
table(covid.df$Pathogens, covid.df$Pathogens2)
covid.df$Pathogens2<-sapply(covid.df$Pathogens, function(x) 
                              ifelse(x==0,".None", ifelse(x==1, ".Single", "Multiple")))
table(covid.df$Pathogens, covid.df$Pathogens2)

##Viruses and bacteria
(viruses1<-grep(pattern = "virus",x =  pathoIDs,ignore.case = TRUE,value = TRUE))
(bacteria1<-pathoIDs[!names(pathoIDs)%in%names(viruses1)])

##----------------------------------------------------------------------------------------
##Co-infections
##----------------------------------------------------------------------------------------
table(covid.df$AnyBacterialInfection)
table(covid.df$AnyViralInfection)
table(covid.df$DiagGroups, covid.df$BacterialCoInfection)
table(covid.df$DiagGroups, covid.df$ViralCoInfection)

##=========================================================================================
##Analysis
##=========================================================================================
##----------------------------------------------------------------------------------------
##(1)Table1 Descriptive results 
?tableby
##----------------------------------------------------------------------------------------
names(covid.df)
(xx1<-apply(covid.df[,names(pathoIDs)],2, function(x) sum(x%in%"Positive")))
(pathoIDs2<-pathoIDs[xx1>0])

var.labs2<-c(DiagGroups="Diagnotic groups",  ISARICSeverity= "ISARIC severity score",
             CovidSeverity="COVID severity", Month="Month of recruitment",
             PCRtest="Overall PCR results", antibody_test_result="Antibody test result",
             HIV_status="HIV status", antibiotic="Antibiotic", outcome="Discharge Outcome",
             AnyViralInfection="Any viral infection",AnyBacterialInfection="Any bacterial infection",
             pathoIDs2, Pathogens="Number of isolates", Pathogens2="Number of isolates")
var.labs2

#Analysis
dim(tab1.df<-covid.df[,names(var.labs2)])
tab1<- as.data.frame(summary(tableby(DiagGroups ~ ., data =tab1.df ), 
                             text=NULL, labelTranslations =var.labs2))

##----------------------------------------------------------------------------------------
##Plot items
##----------------------------------------------------------------------------------------
###Group colors
(group.cols1<-tapply(covid.df$DiagGroupCols, covid.df$DiagGroups, unique))

##Group size
(n1<-sort(table(covid.df$DiagGroups),decreasing = TRUE))

##Legend labels 
(legend1<-paste(names(n1), "(n=", n1, ")", sep = ""))   

(legend1.cols<-c("PCR-confirmed COVID-19"= "#61A440","PCR-IgG+ SARI"= "#01B1BA","PCR-IgG- SARI"= "#B95AFF"))

##=========================================================================================
##Individual isolates Barplots 
# PCR-confirmed COVID-19 = 3F7F02; PCR-IgG+ SARI = 107F80;# PCR-IgG- SARI= 8000FE
##=========================================================================================
##Pathogen with at least one positive results
patho.mat<-covid.df[, names(pathoIDs2[-1])]
(pos.pathos1<-sort(apply(patho.mat,2, function(x) sum(x%in%"Positive")), decreasing = TRUE))
(pos.pathos2<-names(pos.pathos1[pos.pathos1>0]))

##Barplot table
table(covid.df$DiagGroups)
table(covid.df$DiagGroups, covid.df$K.PNEU)

##Frequencies
(bp1.tab<-as.matrix(apply(covid.df[,pos.pathos2], 2, function(y)
   tapply(y, covid.df$DiagGroups, function(k) sum(k%in%"Positive"))))[names(n1),])

##Proportions
(bp1.tab1<-round(100*sweep(x = bp1.tab,MARGIN = 1,STATS=n1, FUN = "/"),2))

##Bacteria
(bp1.bacteria.tab1<-bp1.tab1[,colnames(bp1.tab1)%in%names(bacteria1)])
colnames(bp1.bacteria.tab1)<-bacteria1[colnames(bp1.bacteria.tab1)]; bp1.bacteria.tab1

#Virues
(bp1.virus.tab1<-bp1.tab1[,colnames(bp1.tab1)%in%names(viruses1)])
colnames(bp1.virus.tab1)<-viruses1[colnames(bp1.virus.tab1)]; bp1.virus.tab1

##Merging
(bp1.tab2<-cbind(bp1.bacteria.tab1, NA, bp1.virus.tab1))
##Order
(groups.order<-c("PCR-IgG- SARI","PCR-IgG+ SARI", "PCR-confirmed COVID-19"))
(bp1.tab2<-bp1.tab2[groups.order, ])


##The plot
pdf(file = "Barplot_IndividualIsolates.pdf",  width = 14, height = 12)
par(mar=c(7, 20, 4,2))
bpx<-barplot(bp1.tab2, beside=TRUE, las=1, hor=TRUE, xlim=c(0, 60), 
             col=group.cols1[groups.order], cex.names=1.5, cex.axis=1.5)
mtext(text = "Positivity rate (%)", side = 1, line = 5, cex=2, font=2)
mtext(text = c("Bacteria", "Viruses"), at = c(10, 42), side = 2, line =18 , cex=2, font=2)

#Legend
legend("topright",legend =legend1,fill=legend1.cols, cex=1.5, pt.cex=2.5, title="Groups")
par(mar=c(5, 4, 4,2))

dev.off()

##=========================================================================================
##Isolates combinations Barplot function 
##=========================================================================================

##Test input::(pos.iso.list1<-pos.pathos2); title1<-"title1"
IsoCombBarPlot.fn<-function(pos.iso.list1, title1){ 

dim(iso.dat1<-na.omit(covid.df[,c( "Sample_ID", "DiagGroups", pos.iso.list1)]))
(vd.mat<-vennCounts(apply(iso.dat1[,-c(1:2)],2, function(x) 1*(x%in%"Positive"))))
(yy2<-vd.mat[,"Counts"])
table(yy2>0)
dim(vd.mat2<-vd.mat[yy2>0,]); class(vd.mat2)
(vd.df<-data.frame(ID=row.names(vd.mat2), vd.mat2))

#Pathogen combinations
(plist1<-apply(vd.mat2[,pos.iso.list1],1, function(x) pos.iso.list1[x==1]))
(plist2<-plist1[names(sort(sapply(plist1,length)))])
(plist2.names<-sapply(plist2, function(x) paste(x, collapse = " || ")))
(plist.combs<-setNames(plist2, plist2.names)[-1])

##-----------------------------------------------------------------------------------------
##Isolates combination data.frame
##-----------------------------------------------------------------------------------------
##Mini function
##Test input:(isos1<-plist.combs$`K.PNEU || S.PNEU || RHINO || HAEINF`)
IsoComb.fn<-function(isos1){ 
(isos2<-setdiff(names(iso.dat1), isos1))
 head(idf0<-data.frame(DV="Negative", iso.dat1[,isos1], stringsAsFactors = FALSE))
 (k<-length(isos1))
table(yvar.out<-apply(idf0,1, function(x) sum(x%in%"Positive")==k) &
             apply(iso.dat1[,isos2],1, function(x) sum(x%in%"Positive")==0))

return(yvar.out)
}

###Application of the IsoComb.fn function
plist.combs
iso.dat2<-sapply(plist.combs, function(x) IsoComb.fn(isos1 = x))
apply(iso.dat2,2, sum)

##(1)Negatives**
names(iso.dat1)
sum(nega0<-apply(iso.dat1,1, function(x) sum(x%in%"Positive")==0))
iso.df<-data.frame(Sample_ID=iso.dat1$Sample_ID, DiagGroups=iso.dat1$DiagGroups,
                   Negative=apply(iso.dat1,1, function(x) sum(x%in%"Positive")==0),
                   iso.dat2, stringsAsFactors = FALSE,check.names = FALSE)

##Barplot matrix 
#Frequencies
table(iso.df$COVIDstatus)
apply(iso.df[,-c(1:2)],2, sum)
   (bpy.mat1<-apply(iso.df[,-c(1:2)],2, function(y)  tapply(y, iso.df$DiagGroups, sum)))
(bpy.mat2<-bpy.mat1[,apply(bpy.mat1,2,sum)>0])
dim(bpy.mat1); dim(bpy.mat2)

#Proportions
(bpy.mat3<-round(100*sweep(x=bpy.mat2,MARGIN = 1, STATS = table(iso.df$DiagGroups),FUN = "/"),2))
(bpy.mat3<-bpy.mat3[groups.order,])
#The plot
par(mar=c(7, 20, 4,2))
bpx<-barplot(bpy.mat3, beside=TRUE, las=1, hor=TRUE, xlim=c(0, max(bpy.mat3)+15), main=title1,
             col=group.cols1[groups.order], cex.names=1, cex.axis=1.5)
mtext(text = "Positivity rate (%)", side = 1, line = 5, cex=2, font=2)
mtext(text = "Isolate combinations", side = 2, line =15 , cex=2, font=2)

##Legend
legend("topright",legend = legend1, fill=legend1.cols, cex=1.5, pt.cex=2.5, title="Groups")
par(mar=c(5, 4, 4,2))

##THE END

return(iso.df)
}

##=========================================================================================
##Application of the IsoCombBarPlot.fn function
##=========================================================================================

##(1) With Kleb
pdf(file = "Barplot_IsolatesComb.pdf",  width = 14, height = 12)
pos.pathos2
iso.comb.df1<-IsoCombBarPlot.fn(pos.iso.list1 =pos.pathos2 , title1="") 
dev.off()

##(2) Without Kleb
pdf(file = "Barplot_IsolatesCombNoKleb.pdf",  width = 14, height = 12)
pos.pathos2
iso.comb.df2<-IsoCombBarPlot.fn(pos.iso.list1 =pos.pathos2[-1] , title1="No Kleb") 
dev.off()

##(3) Comparisons (p-values)
(iso.tab1<- as.data.frame(summary(tableby(DiagGroups ~ ., data =iso.comb.df1[,-1]),text=NULL)))
(iso.tab2<- as.data.frame(summary(tableby(DiagGroups ~ ., data =iso.comb.df2[,-1]),text=NULL)))

##=========================================================================================
##Pairwise comparsions p-values
##=========================================================================================
##Test input::dat1<-covid.df; yvar<-"K.PNEU"; group.var<-"DiagGroups"; ypos<-"Positive"

PairWiseStats.fn<-function(dat1, yvar, group.var,ypos) { 
  ##(1)Data management
  table(is.na(dat1[group.var]), is.na(dat1[,yvar]))
  table(is.na(dat1[group.var]), is.na(dat1[,yvar]))
  head(dat2<-data.frame(Group=dat1[,group.var],
                        Y=factor(sapply(dat1[,yvar],function(x) ifelse(x%in%ypos,1,0)))))
  dim(dat2) 
    names(dat2)<-c("Group", "Y"); head(dat2)
    table(dat2$Group)
  dat2$Group2<-relevel(x = factor(dat2$Group),ref = names(table(dat2$Group))[2])
  table(dat2$Group2)

  ##(2)Frequency and proportions
    (n11<-tapply(dat2$Y, dat2$Group, length))
    (k11<-tapply(dat2$Y, dat2$Group, function(x) sum(x%in%1)))
    (p11<-round(100*k11/n11, 2))
    (pval1<-as.numeric(fisher.test(x=dat2$Group, y=dat2$Y)["p.value"]))
    
  ##(3) Odds ratios and confidence intervals
 summary(mylogit1 <- glm(Y ~ Group ,  data = dat2, family = "binomial"))
 summary(mylogit2 <- glm(Y ~ Group2 ,  data = dat2, family = "binomial"))
 
 (OR11<-cbind(OR = coef(summary(mylogit1)), confint(mylogit1)))
 (OR12<-cbind(OR = coef(summary(mylogit2)), confint(mylogit2)))
 (OR2<-rbind(OR11, OR12[3,]))

 (odds.tab<-data.frame(OR=exp(OR2[,"Estimate"]), LB=exp(OR2[,"2.5 %"]), UB=exp(OR2[,"97.5 %"]),
                          SE=OR2[,"Std. Error"], Z=OR2[,"z value"], Pvalue2=OR2[,"Pr(>|z|)"]))
 odds.tab[1,1:6]<-c(1, NA,NA, NA,NA,NA); odds.tab
 
  ##(4) Organising groups
(group.names<-names(table(dat2$Group)))
 (groups1<-c(group.names, group.names[3]))
 (groups2<- c(paste(group.names, " VS ",group.names[1], sep=""),
              paste(group.names[3], " VS ", group.names[2], sep="")))
    
##Final data frame
(output.df<-data.frame(Pathogen=yvar, Groups=groups1, Total=n11[groups1], Positive=k11[groups1],
                      Percent=p11[groups1],Pvalue=pval1, Comparisons=groups2, odds.tab))
 
 ##END

  return(output.df)
}

##----------------------------------------------------------------------------------------
##Application of the PairWiseStats.fn function
##----------------------------------------------------------------------------------------
##(1)Single pathogens
pos.pathos2
(patho.res.df1<-Reduce(rbind.data.frame, lapply(pos.pathos2, function(x)
  PairWiseStats.fn(dat1 = covid.df, yvar = x,group.var = "DiagGroups", ypos = "Positive"))))

###Pathogen combinatons
names(iso.comb.df1)
(pathocomb.res.df1<-Reduce(rbind.data.frame, lapply(names(iso.comb.df1)[-c(1:3)], function(x)
  PairWiseStats.fn(dat1 = iso.comb.df1, yvar = x,group.var = "DiagGroups", ypos = "TRUE"))))

#=========================================================================================
##HeatMap
## PCR-confirmed COVID-19 = 3F7F02; PCR-IgG+ SARI = 107F80;# PCR-IgG- SARI= 8000FE
#=========================================================================================
source("Heatmap3Function.R")

##Sorting variable
table(covid.df$DiagGroups)
table(covid.df$DiagGroups2<-sapply(covid.df$DiagGroups, function(x) 
  switch(x, "PCR-confirmed COVID-19"=3, "PCR-IgG+ SARI"=2, "PCR-IgG- SARI"=1)))
table(covid.df$DiagGroups, covid.df$DiagGroups2) 
 
##Heatmap data
 hm.df<-covid.df[order(covid.df$DiagGroups2, covid.df$HIV_status),]
 hm.dat<-apply(hm.df[,c("HIV_status", names(pathoIDs))],2, function(x) ifelse(x%in%"Positive",1,0))
 dim(hm.dat)
 row.names(hm.dat)<-hm.df$Sample_ID

 table(patho.cols<-ifelse(colnames(hm.dat)%in%names(viruses1), "slategray1", "purple"))

 (pathos.order1<-c(HIV_status="HIV positive", viruses1, bacteria1))
 
 ##Colours
 cols.df<-cbind(Bacteria=ifelse(hm.df$AnyBacterialInfection%in%"Positive", "blue2", "white"),
                Viruses=ifelse((hm.df$AnyViralInfection%in%"Positive" | hm.df$HIV_status%in%"Positive"),
                               "gold2", "white"), Groups= hm.df$DiagGroupCols)
 row.names(cols.df)<-hm.df$Sample_ID; head(cols.df)

 ##The plot
pdf(file = "HeatMap.pdf",  width = 14, height = 12)
 dim(hm.dat2<-t(hm.dat[,names(pathos.order1)]))
 heatmap.3(hm.dat2, col = c("lightgray","red4"),ColSideColors = cols.df,  Rowv = FALSE, Colv = FALSE,
           labCol =hm.df$Sample_ID, mar=c(8, 16),  key = FALSE, dendrogram = "none",
           rowsep = length(viruses1)+1, labRow =pathos.order1,
           colsep = cumsum(sort(n1)))
 
 mtext(text="Samples",side = 1, line=4, cex=2, font=2)
 mtext(text="Isolates",side = 4, line=0, cex=2, font=2)
##Legend
 (legend2<-c("PCR-confirmed COVID-19", "PCR-IgG+ SARI","PCR-IgG- SARI", 
             "Viral infection", "Bacterial infection","FTD positive", "FTD Negative"))
 legend("left", legend = , legend2, y.intersp = 1.3, cex=1, pt.cex=4, title = "ColorKey",
        fill=c( legend1.cols, "gold2", "blue2", "red4", "lightgray"))

dev.off()
 
 ##---------------------------------------------------------------------------------------
 ##Basic Boxplot function
 #----------------------------------------------------------------------------------------
 ##Test inpt:: table(bp.filter<-covid.df$PCRtest%in%"Negative"); title1<-"Title"
 
 Boxplot2.fn<-function(bp.filter, title1) { 
  
   ##(1)Subsetting data
   table(bp.filter)
   table(patho.filter<-!is.na(covid.df$K.PNEU))
   table(dat.filter<-bp.filter & patho.filter)
   dim(bp.df1<-covid.df[dat.filter,])
   
   ##(2)Frequencies
   #(a)Total
   (kk0<-apply(bp.df1[,pos.pathos2],2, function(x) sum(x%in%"Positive")))
   (kk0.keep<-sort(kk0[kk0>0],decreasing = TRUE))
   
   #(b)Stratified by co-infection status
   table(bp.df1$K.PNEU, bp.df1$Pathogens2) ##Testing 
   (kk1<-apply(bp.df1[,names(kk0.keep)],2, function(x) sum(x%in%"Positive" & bp.df1$Pathogens<=1)))
   (kk2<-apply(bp.df1[,names(kk0.keep)],2, function(x) sum(x%in%"Positive" & bp.df1$Pathogens>=2)))
   
   (freq.tab1<-rbind(Single=kk1, Multiple=kk2, Total=kk0.keep))
   ##(3)Percentages
   (prop.tab1<-round(100*(freq.tab1/nrow(bp.df1)),1))
   
   ##(4)The plot 
   (bpy.names<-pathoIDs[colnames(prop.tab1)])
   (bpy<-prop.tab1[c("Single", "Multiple"),])
   
   table(bp.df1$Pathogens2)
   
   par(mar=c(18,8,6,2))
   barplot(bpy, beside=FALSE,legend=TRUE,names=bpy.names, cex.names=1.5, col=c("green4", "blueviolet"),
           ylim=c(0,100), cex.axis=1.5,  cex.lab=2,cex.main=1.8, las=2, font.lab=2,
           ylab="Positivity rate (%)", main=c(title1,"", paste("(n=", nrow(bp.df1), ")", sep="")),
           args.legend=list(x="topleft", cex=1.5, pt.cex=4,y.intersp=1.2, title="Infections"))
   mtext(text = "Isolates", side=1, line=16, cex=2,font=2)
   par(mar=c(5,4,4,2))
   
   ##Output
   (output<-list(n= nrow(bp.df1), freq=freq.tab1, prop=prop.tab1))
   return(output)
 }
 
 ##--------------------------------------------------------------------------------------- 
 ##Application of the
 #----------------------------------------------------------------------------------------
 table(covid.df$PCR_AB)
 table(covid.df$PCR_AB, covid.df$K.PNEU)
 pdf(file="Barplot_ForEachDiagGroups.pdf", width = 12, height = 12)
  par(mfrow=c(1,3))
  #(a)PCR postive
  Boxplot2.fn(bp.filter =covid.df$DiagGroups%in%"PCR-confirmed COVID-19", title1="PCR+ confirmed COVID-19")
  
  ##(b)Negative-Positive
  Boxplot2.fn(bp.filter =covid.df$DiagGroups%in%"PCR-IgG+ SARI",title1 = "PCR-IgG+ SARI")
  
  ##(c)Negative-Negative
  Boxplot2.fn(bp.filter =covid.df$DiagGroups%in%"PCR-IgG- SARI",title1 = "PCR-IgG- SARI")
  par(mfrow=c(1,1))
 dev.off() 
 
 ##=======================================================================================
 ##Diagnostic barplot (COVID meeting 20/01/2010)
 ##=======================================================================================
 #PCR+ confirmed COVID-19 = #3F7F02; PCR-IgG+ SARI = #107F80; PCR-IgG- = #8000FE
  ##------------------------------------------------------------------------------------
 ##(2)Barplot function
 ##------------------------------------------------------------------------------------
##Test inputs:: patho.var<-"K.PNEU"; Title1<-"Title1"
 DiagBarPlot.fn<-function(patho.var, Title1) { 
  ##(1)Data management
   ##(a)Extracting eligible samples and variables)
 table(is.na(patho.yy1<-covid.df[patho.var])); table(patho.yy1<-covid.df[,patho.var])
 table(!is.na(patho.yy1), !is.na(covid.df$DiagGroups))
 table(df.filter<-!is.na(patho.yy1) & !is.na(covid.df$DiagGroups))
 dim(df1<-covid.df[df.filter, c("HIV_status", "DiagGroups", patho.var)])
 names(df1)<-c("HIV","Groups","Pathogen")
    
   ##(b)Data list
  sapply(df.list<-c( split(x=df1, f = df1$HIV),Total=list(df1)),dim)
  names(df.list)<-c("Neg", "Pos", "All"); sapply(df.list,dim)
  
  ###(2)Barplot tables
    ftable(df1[,-1]);   ftable(df1)
  #(a) Totals (denominators)
  table(df1$Groups)
  table(df1$HIV,df1$Groups)
  (freq.n1<- sapply(df.list, function(x) table(x$Groups))[groups.order,])
  
#(b)Positive cases
  (freq.k1<-sapply(df.list, function(x) 
     tapply(x$Pathogen ,x$Groups, function(y) sum(y%in%"Positive")))[groups.order,])

#(c)Proportions
  freq.n1; freq.k1
  (freq.p1<-round(100*freq.k1/freq.n1,2))

  ##(3) P-values
(p.vals1<-round(sapply(df.list, function(x)
    as.numeric(fisher.test(x=x$Groups, y=x$Pathogen)["p.value"])),3))
  #Names
  (n01<-sapply(df.list, nrow))
  (bp.names1<-paste(names(n01), "(n=", n01,")",sep=""))
  
  ##(4)The barplot
  par(mar=c(8,8,4,2))
    bpx<-barplot(freq.p1, beside=TRUE, ylim=c(0,100),  main=Title1, las=1,cex.main=2, font.main=4,
               cex.axis=1.5, cex.names=1.5, names=bp.names1,col=group.cols1[groups.order])
    
 #Legend
  legend("topleft", legend = legend1, fill=legend1.cols, cex=1.5, pt.cex=3, y.intersp=1.2)

  ##Lables
  mtext(text = "HIV status", side = 1, line = 5, cex=2, font=2)
  mtext(text = "Positivity rate (%)", side = 2, line = 5, cex=2, font=2)
  text(x = bpx, y=5, labels =paste("n=",freq.n1,sep=""), cex=1.4, font=1, srt=90)
  text(x = bpx[2,], y=apply(freq.p1,2,max), 
       labels = paste("Pvalue=",p.vals1,sep=""),pos=3, cex=1.4, font=3)
  #grid(nx = 0, ny=10)
  par(mar=c(5,4,4,2))

  ##Output
 (output<-list(n=n1, Freqn=freq.n1, Freq.k=freq.k1, freq.p=freq.p1, Pvalues=p.vals1))
 return(output)
 }
 
 ##------------------------------------------------------------------------------------
 ##Barplot function
 ##------------------------------------------------------------------------------------
 ##(1) Streptococcus pneumoniae
 pdf(file = "Streptococcus_pneumoniaeBarPlot.pdf", width = 12, height = 12)
table(covid.df$DiagGroups,covid.df$S.PNEU)
 ftable(covid.df$HIV_status, covid.df$DiagGroups,covid.df$S.PNEU)
 (sp.out<- DiagBarPlot.fn(patho.var ="S.PNEU",Title1="Streptococcus pneumoniae"))
 dev.off()
 
  ##(2) Klebsiella pneumoniae
 pdf(file = "Klebsiella_pneumoniaeBarPlot.pdf", width = 12, height = 12)
 table(covid.df$DiagGroups,covid.df$K.PNEU)
 ftable(covid.df$HIV_status, covid.df$DiagGroups,covid.df$K.PNEU)
 (kp.out<- DiagBarPlot.fn(patho.var ="K.PNEU",Title1="Klebsiella pneumoniae"))
 dev.off()
 
 ##(3) Staphylococcus aureus
 pdf(file = "Staphylococcus_aureusBarPlot.pdf", width = 12, height = 12)
 table(covid.df$DiagGroups,covid.df$S.AUR)
 ftable(covid.df$HIV_status, covid.df$DiagGroups,covid.df$S.AUR)
 (su.out<- DiagBarPlot.fn(patho.var ="S.AUR",Title1="Staphylococcus aureus"))
 dev.off()
  
 pdf(file = "SP_KP_SA_BarPlot.pdf", width = 14, height = 12)
  ##(4) Combined figure
 par(mfrow=c(1,3))
 (sp.out<- DiagBarPlot.fn(patho.var ="S.PNEU",Title1="Streptococcus pneumoniae"))
 (kp.out<- DiagBarPlot.fn(patho.var ="K.PNEU",Title1="Klebsiella pneumoniae"))
 (sa.out<- DiagBarPlot.fn(patho.var ="S.AUR",Title1="Staphylococcus aureus"))
 par(mfrow=c(1,1))
 dev.off()
 
 ##=======================================================================================
 ##Seasonality plots
 ##=======================================================================================
 ##Full calendar year(2020)
 (dates.2020<-seq(as.Date("2020-01-01"), as.Date("2020-12-31"), by = "days"))
 #Data frame
 df.2020<-data.frame(Date=dates.2020, Date2=format(dates.2020,"%Y%b%d"), Days=days(dates.2020),
                     Weeks=cut(dates.2020, "weeks", start.on.monday = FALSE),
                     Month1=format(x=dates.2020, "%b"),
                     Month2=format(x=dates.2020, "%m"), Year=format(x=dates.2020, "%Y"))
 (df.2020$Weeks2<-format(as.Date(df.2020$Weeks), "%Y%b%d"))
 head(df.2020)
 
##National data
 mw_epi_df <- read_excel("20201211_Malawi_Epidemic _to_05Oct2020.xlsx",skip = 2)
 mw_epi_df$ID<- paste(mw_epi_df$Date)
 
 ##Merge with COVID data
 ssn.vars<-c("date_of_recruitment","Sample_ID","DiagGroups","PCRtest")
 head(df0<-covid.df[,ssn.vars])
 (df0$ID<-paste(gsub(pattern =" UTC", replacement = "",x= df0$date_of_recruitment)))
 df.2020$ID<-paste(df.2020$Date); class(df.2020$ID)
 
 ##Merging
 intersect(df.2020$ID, df0$ID)
 setdiff(df0$ID,df.2020$ID)
 ssn.df0<-merge(df.2020, mw_epi_df, by="ID", all.x=TRUE)
 ssn.df<-merge( ssn.df0, df0, by="ID", all.x=TRUE)
 table(ssn.df$d); table(ssn.df$PCRtest)
 class(ssn.df$ConfirmedCases)
 
 ##Weekly summaries
table(ssn.df$Weeks,ssn.df$PCRtest)
n1<-tapply(ssn.df$ConfirmedCases,ssn.df$Weeks, function(x) sum(x, na.rm = TRUE)); dim(n1)
(n2<-data.matrix(table(ssn.df$Weeks,ssn.df$PCRtest))); dim(n2)
(ssnbp.df<-data.frame(Weeks=names(n1),Total=n1, PCR_Negative=n2[,"Negative"],
                      PCR_Positive=n2[,"Positive"], 
                      Percent=apply(n2,1, function(x) 100*x[2]/sum(x))))

# #####The plot
pdf(file = "SeasonalityPlot.pdf",  width = 16, height = 12)
par(mar=c(8,8,4,7))
ssnbp.df2<-ssnbp.df[-1,]
(names1<-format(as.Date(ssnbp.df2$Weeks), "%b%d"))

##First layer (National data)
bpy1<-ssnbp.df2$Total
bpx<-barplot(bpy1, las=2, col="gray", names=names1, cex.axis=1.3, 
             main="", cex.lab=2, font.lab=2, ylim=c(0,1200))
mtext(text = "Weekly dates", side = 1, line=6, font=2, cex=2)
mtext(text = "National COVID-19 cases/week", side = 2, line=6, font=1, cex=2)
 
##Second axis 
par(new = TRUE)
plot(bpx, ssnbp.df2$PCR_Negative, type = "l", axes = FALSE, bty = "n", xlab = "", 
     ylab = "", lwd=2, pch=1, col="skyblue", lty=1, ylim=c(0,8), cex=1.5)
lines(bpx, ssnbp.df2$PCR_Positive, type = "l", lwd=2, pch=16, col="red", lty=1, cex=1.5)
axis(side=4, at = 0:10, las=1, cex=1.5, col="purple")
mtext("Study cases/week", side=4, line=4, font=1, cex=2)

##Legend
legend("topright", c("National Data", "RT-PCR Negative", "RT-PCR Positive"),
       fill=c("gray", "skyblue","red"), cex=1.5, pt.cex=3)

par(par(mar=c(5,4,4,2)))

dev.off()

#=========================================================================================
##Number of isolates Boxplot 
#=========================================================================================
##(1)Stats
#(a)General pvalue
(pv3<-kruskal.test(x =covid.df$Pathogens, g=factor(covid.df$DiagGroups))[["p.value"]])

#(b) "PCR-IgG-" vs "PCR-IgG+"
tapply(covid.df$Pathogens, covid.df$DiagGroups, function(x) median(x, na.rm = TRUE))
tapply(covid.df$Pathogens, covid.df$DiagGroups, function(x) mean(x, na.rm = TRUE))

(pv12<-wilcox.test(formula=Pathogens~DiagGroups, 
            data = covid.df[!covid.df$DiagGroups%in%"PCR-confirmed COVID-19",])[["p.value"]])

#(c)  "PCR+confirmed COVID-19" vs "PCR-IgG-"
pv13<-wilcox.test(formula=Pathogens~DiagGroups, 
            data = covid.df[!covid.df$DiagGroups%in%"PCR-IgG+ SARI",])[["p.value"]]

#(d) "PCR+confirmed COVID-19" vs  "PCR-IgG+"
(pv23<-wilcox.test(formula=Pathogens~DiagGroups, 
            data = covid.df[!covid.df$DiagGroups%in%"PCR-IgG- SARI",])[["p.value"]])

#(e) Data frame
(path.pval.df<-data.frame(Comparison=c("Overall", "PCR-IgG+ SARI vs PCR-IgG- SARI", 
                      "PCR-confirmed COVID-19 vs PCR-IgG- SARI", "PCR-confirmed COVID-19 vs PCR-IgG+ SARI"),
                        Pvalues=c(pv3, pv12, pv13, pv23)))
write.csv(x = path.pval.df, file = "PathogenBoxPlotPvalues.csv")

##(2) The plot
pdf(file="PathogenBoxPlot.pdf", width = 12, height = 12)
par(mar=c(8,8,4,2))
boxplot(covid.df$Pathogens~covid.df$DiagGroups,pch=16,names=legend1[c(3:1)],lwd=2, cex=0.1,
        border=group.cols1,las=1, cex.lab=2,cex.axis=1.2)
mtext(text = "Groups",side = 1,line = 5, font=2, cex=2)
mtext(text = "Number of positive isolates",side = 2,line = 5, font=2, cex=2)

stripchart(covid.df$Pathogens~covid.df$DiagGroups, vertical = TRUE, method = "jitter",
           add=TRUE, col=group.cols1, cex=3, pch=16)
par(mar=c(5,4,4,2))
dev.off()

#=========================================================================================
###Saving outputs to Excel
#=========================================================================================
##(1)Saving raw data to CSV file  
write.csv(x = covid.df, file = "MergedCovidDataFrame2021Jan27.csv")

##(2)Results

##Saving workbook
wb <- createWorkbook()
addWorksheet(wb, "DescriptiveResultsTable")
addWorksheet(wb, "PathogenComb_With_Kleb")
addWorksheet(wb, "PathogenComb_Without_Kleb")
writeData(wb = wb,sheet ="DescriptiveResultsTable", x = tab1)
writeData(wb = wb,sheet ="PathogenComb_With_Kleb", x = iso.tab1 )
writeData(wb = wb,sheet ="PathogenComb_Without_Kleb", x = iso.tab2)
saveWorkbook(wb, file = "ResultTables2021Jan26.xlsx", overwrite = TRUE)


###Pairwise comparisons
pw.comb.wb <- createWorkbook()
addWorksheet(pw.comb.wb, "SinglePathogen")
addWorksheet(pw.comb.wb, "PathogenCombinations")

writeData(wb=pw.comb.wb ,sheet ="SinglePathogen", x =patho.res.df1)
writeData(wb=pw.comb.wb ,sheet ="PathogenCombinations", x =  pathocomb.res.df1 )
saveWorkbook(pw.comb.wb, file = "PairWiseComparisonTables2021Jan26.xlsx", overwrite = TRUE)

